import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
df = pd.read_csv("Estonian citizens abroad.csv",
header=0)
df.head()
| Nimi | Name | iso_alpha | # of Estonian citizens | Continent | Sub-region | Capital | Latitude (capital) | Longitude (capital) | Distance between Capital and Tallinn (km) | Population (2020) | GDP PPP per capita | Formerly part of the USSR? | Sovereignty | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Venezuela | Venezuela | VEN | 237 | Americas | South America | Caracas | 10.500000 | -66.933300 | 9098.0 | 28435943 | NaN | No | Sovereign |
| 1 | Monaco | Monaco | MCO | 28 | Europe | Western Europe | Monaco | 43.739600 | 7.406900 | 2103.1 | 39244 | NaN | No | Sovereign |
| 2 | Taiwan (Chn) | Taiwan | TWN | 12 | Asia | Eastern Asia | Taipei | 25.047800 | 121.531900 | 7996.5 | 23816775 | NaN | No | Sovereign |
| 3 | Jersey (Gbr) | Jersey | JEY | 5 | Europe | Northern Europe | Saint Helier | 49.185800 | -2.110000 | 2059.7 | 107800 | NaN | No | Not sovereign |
| 4 | Ühendriikide Hajasaared (Usa) | United States Minor Outlying Islands | UMI | 5 | Oceania | Micronesia | Wake Atoll | 19.308552 | 166.631012 | 10598.8 | 300 | NaN | No | Not sovereign |
target = df["# of Estonian citizens"]
target_label = "# of Estonian citizens"
name = df["Name"]
name_label = "Name"
df["Name and number of citizens"] = name.map(str) + '\n' + target.map(str)
name_target = df["Name and number of citizens"]
name_target_label = "Name and number of citizens"
# normalize distances
distance = df["Distance between Capital and Tallinn (km)"]
distance_norm = (distance-distance.min())/ (distance.max() - distance.min())
df["Distance normalized"] = distance_norm
fig = go.Figure(go.Scattergeo(
text = df[name_target_label],
lat = df["Latitude (capital)"],
lon = df["Longitude (capital)"],
marker = dict(
size = df[target_label]**0.36,
color = df[target_label],
colorscale = "plasma",
colorbar_title= target_label
)
))
fig.update_geos(
visible=False, resolution=50,
showcountries=True, projection_type="natural earth",
showland=True, landcolor="LightBlue"
)
fig.update_traces(customdata=df[name_target_label])
fig.update_traces(hovertemplate='%{customdata}<extra></extra>')
fig.show()
fig = px.scatter(df, x="Distance normalized", y = target_label,
color = "Continent",
log_y = True,
hover_name = "Name",
hover_data = {"Distance normalized":False,
"Continent":False,
target_label:":.0f"},
labels = {target_label: "# of Estonian citizens",
"Distance normalized": "Distance between country capital and Tallinn (normalized)"},
)
fig.show()
# normalize GDP PPP per capita
GDP = df["GDP PPP per capita"]
GDP_norm = (GDP-GDP.min())/ (GDP.max() - GDP.min())
df["GDP PPP per capita normalized"] = GDP_norm
df.sort_values(by="GDP PPP per capita normalized", ascending = False)
| Nimi | Name | iso_alpha | # of Estonian citizens | Continent | Sub-region | Capital | Latitude (capital) | Longitude (capital) | Distance between Capital and Tallinn (km) | Population (2020) | GDP PPP per capita | Formerly part of the USSR? | Sovereignty | Name and number of citizens | Distance normalized | GDP PPP per capita normalized | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19 | Luksemburg | Luxembourg | LUX | 280 | Europe | Western Europe | Luxembourg | 49.610600 | 6.132800 | 1613.6 | 625976 | 117500.20720 | No | Sovereign | Luxembourg\n280 | 0.089800 | 1.000000 |
| 20 | Singapur | Singapore | SGP | 33 | Asia | South-eastern Asia | Singapore | 1.300000 | 103.800000 | 9266.5 | 5850343 | 98520.02954 | No | Sovereign | Singapore\n33 | 0.538738 | 0.836736 |
| 21 | Iirimaa | Ireland | IRL | 2469 | Europe | Northern Europe | Dublin | 53.349700 | -6.260300 | 2002.8 | 4937796 | 93180.94540 | No | Sovereign | Ireland\n2469 | 0.112632 | 0.790810 |
| 22 | Katar | Qatar | QAT | 8 | Asia | Western Asia | Doha | 25.300000 | 51.533300 | 4324.3 | 2881060 | 89968.77102 | No | Sovereign | Qatar\n8 | 0.248816 | 0.763180 |
| 23 | Bermuda (Gbr) | Bermuda | BMU | 3 | Americas | Northern America | Hamilton | 32.294200 | -64.783900 | 6936.5 | 62273 | 80829.61681 | No | Not sovereign | Bermuda\n3 | 0.402054 | 0.684566 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14 | Ahvenamaa | Åland Islands | ALA | 1 | Europe | Northern Europe | Mariehamn | 60.098600 | 19.944400 | 279.1 | 30129 | NaN | No | Not sovereign | Åland Islands\n1 | 0.011515 | NaN |
| 15 | Antarktis | Antarctica | ATA | 1 | Antarctica | Antarctica | NaN | -75.250973 | -0.071389 | 15251.9 | 4877 | NaN | No | Not sovereign | Antarctica\n1 | 0.889856 | NaN |
| 16 | Réunion (Fra) | Réunion | REU | 1 | Africa | Eastern Africa | Saint-Denis | -20.878900 | 55.448100 | 9359.0 | 895308 | NaN | No | Not sovereign | Réunion\n1 | 0.544164 | NaN |
| 17 | Svalbard ja Jan Mayen (Nor) | Svalbard and Jan Mayen | SJM | 1 | Europe | Northern Europe | Longyearbyen | 78.216700 | 15.633300 | 2114.0 | 2939 | NaN | No | Not sovereign | Svalbard and Jan Mayen\n1 | 0.119155 | NaN |
| 18 | Wallis ja Futuna (Fra) | Wallis and Futuna | WLF | 1 | Oceania | Polynesia | Mata-Utu | -13.282500 | -176.173600 | 14600.7 | 11246 | NaN | No | Not sovereign | Wallis and Futuna\n1 | 0.851655 | NaN |
135 rows × 17 columns
fig = px.scatter(df,
x="GDP PPP per capita", y = target_label,
color = "Continent",
log_y = True,
hover_name = "Name",
hover_data = {"GDP PPP per capita":":.0f",
"Continent":False,
target_label:False
},
labels = {
target_label: "# of Estonian citizens",
"GDP PPP per capita": "GDP PPP per capita (INT USD, 2020)"
}
)
fig.show()